# %%
from netCDF4 import Dataset
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from glob import glob


def inst_type(stn):
    import re

    p = re.compile("P[0-9]")
    m = p.match(stn)

    if m:
        return "Pandora"
    else:
        return "MAX-DOAS"


#####################################MAX-DOAS#####################################
class DATA:

    def __init__(self, f):
        _profile_group = f.groups['PROFILE']
        _mmf_group = _profile_group.groups['MMF']
        self.channel_type = f.channel_type
        self.no2_vis_varname, self.no2_uv_varname, self.hcho_varname = var_name(
            self.channel_type)

        if self.no2_vis_varname is not None:
            self.no2_vis = _mmf_group.groups[self.no2_vis_varname].variables[
                'tropospheric_vertical_column_density'][:]
            self.no2_vis_qf = _profile_group.variables[
                f'qa_flag_{self.no2_vis_varname.lower()}'][:]

        if self.no2_uv_varname is not None:
            # print(self.no2_uv_varname)
            # print(f'qa_flag_{self.no2_uv_varname.lower()}')
            self.no2_uv = _mmf_group.groups[self.no2_uv_varname].variables[
                'tropospheric_vertical_column_density'][:]
            self.no2_uv_qf = _profile_group.variables[
                f'qa_flag_{self.no2_uv_varname.lower()}'][:]

        if self.hcho_varname is not None:
            self.hcho = _mmf_group.groups[self.hcho_varname].variables[
                'tropospheric_vertical_column_density'][:]
            self.hcho_qf = _profile_group.variables[
                f'qa_flag_{self.hcho_varname.lower()}'][:]

        self.scantime = _profile_group.variables['date_of_retrieval'][:]
        self.vaa = _profile_group.variables['telescope_azimuth_angle'][:]


def make_df(data):
    from air_toolbox import util
    # 객체의 모든 속성을 리스트로 가져오기
    _attr_list = util.call_attr(data)
    # `_varname` 이 포함되지 않고, `channel_type`이 아닌 속성만 필터링
    _filtered_attrs = [
        _attr for _attr in _attr_list
        if not _attr.endswith("_varname") and _attr != "channel_type"
    ]
    # 속성값을 딕셔너리로 변환 (속성명: 속성값)
    _data_dict = {_attr: getattr(data, _attr) for _attr in _filtered_attrs}

    # 데이터프레임 생성
    _df = pd.DataFrame(_data_dict)  # 리스트로 감싸서 1-row DataFrame 생성
    return _df


def var_name(channel_type):
    if channel_type == 'UVVIS':
        _no2_vis_varname = 'NO2_VIS_428NM'
        _no2_uv_varname = 'NO2_UV_357NM'
        _hcho_varname = 'HCHO_343NM'
    elif channel_type == 'UV':
        _no2_vis_varname = None
        _no2_uv_varname = 'NO2_357NM'
        _hcho_varname = 'HCHO_343NM'
    elif channel_type == 'VIS':
        _no2_vis_varname = 'NO2_460NM'
        _no2_uv_varname = None
        _hcho_varname = 'HCHO_343NM'
    return _no2_vis_varname, _no2_uv_varname, _hcho_varname


def inst_info(stn):
    if stn == 'UB_Incheon':
        out_count = 2
    else:
        out_count = 1


def pathfinder(stn):
    """Call filelist for given station

    Args:
        stn (str): name of station (e.g., 'UB_Ulsan')

    Returns:
        filelist (list): list of file paths
    """
    from air_toolbox.config import max_doas_datapath
    from glob import glob

    _flist = glob(f'{max_doas_datapath}/{stn}/*.nc')

    return _flist


def read_frm4doas(stn):
    _flist = pathfinder(stn)
    if len(_flist) == 0:
        print(f'No file found for {stn}')
        return None
    for count, _fname in enumerate(_flist):
        with Dataset(_fname, 'r') as _f:
            print(_fname)
            _max_doas = DATA(_f)
            _max_doas_df = make_df(_max_doas)
        if count == 0:
            _max_doas_merge = _max_doas_df
        else:
            _max_doas_merge = pd.concat([_max_doas_merge, _max_doas_df],
                                        axis=1)

    return _max_doas_merge


####################################Read PGN####################################
def read_pgn(prod, qc=True):
    from air_toolbox import pandora

    if prod == "NO2_TCD":
        pan = pandora.pgn_readfile(pan_num="P150",
                                   ftype="rnvs",
                                   qc="no",
                                   var_name=prod)
    if prod == "NO2_TROP":
        pan = pandora.pgn_readfile(pan_num="P150",
                                   ftype="rnvh",
                                   qc="no",
                                   var_name=prod)
        # pan[pan["NO2_TROP"] < 0] = np.nan
    elif prod == "HCHO_TCD":
        pan = pandora.pgn_readfile(pan_num="P150",
                                   ftype="rfus",
                                   qc="no",
                                   var_name=prod)
    elif prod == "HCHO_TROP":
        pan = pandora.pgn_readfile(pan_num="P150",
                                   ftype="rfuh",
                                   qc="no",
                                   var_name=prod)
    if qc:
        pan = pan[(pan["QF"] == 0) | (pan["QF"] == 1) | (pan["QF"] == 10) |
                  (pan["QF"] == 11)]  # quality control
        # pan = pan[pan.Uncertainty < 1.35e15]
        pan = pan[pan.nRMS < 0.002]

    pan.set_index('KST', inplace=True)
    pan_period = pan.loc['2021-08-01 00:00':'2024-07-31 23:00']

    return pan_period


def read_car(car_fname, region=None, threshold=1.5e-3):
    from air_toolbox import car_doas
    car_df = car_doas.car_df(car_fname, timezone='KST')
    ### quality control
    car_df[car_df.RMS > threshold] = np.nan
    if type(region) == str:
        if region == 'Beomseo':
            extent = [129.18, 129.27, 35.555, 35.58]
        car_df[region] = False

        car_df.loc[(car_df.LON > extent[0]) & (car_df.LON < extent[1]) &
                   (car_df.LAT > extent[2]) & (car_df.LAT < extent[3]),
                   region] = True

        car_df_filter = car_df[car_df[region]]
        car_df_filter['dt'] = car_df_filter.KST.diff().dt.total_seconds(
        ) / 3600
        car_df_filter['round'] = (car_df_filter.dt > 1).cumsum()

        return car_df_filter
    else:
        return car_df
